{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "33af8f39-ec00-497e-9e82-f5f7271861bc",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os, time\n",
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5a3dd38c-42f8-408d-825a-ee8d7273d6ab",
   "metadata": {},
   "outputs": [],
   "source": [
    "os.chdir(\"/Users/xiaosongw/Dropbox/Research/InformedSources/Replication/Analysis\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c459f6cf-20f6-4ad3-a860-39081f28cc15",
   "metadata": {},
   "outputs": [],
   "source": [
    "l_d_pre = pd.date_range('2015-05-01', '2016-03-01')[0:-1]\n",
    "l_d_pos = pd.date_range('2016-05-01', '2017-03-01')[0:-1]\n",
    "l_b = ['BP', 'Caltex', 'Coles', 'Woolworths', '7-Eleven', 'Other']"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "85756c34-9a80-479f-aa15-30ecf9f0bc78",
   "metadata": {},
   "source": [
    "# price data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3c6a932a-4a4c-443c-8b6a-4930831e22a3",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_p = pd.read_csv(\"../Build/Output/p_in.csv\")\n",
    "df_p['t'] = pd.to_datetime(df_p['t'])\n",
    "d_id_bn = df_p.groupby('id')['bid'].last().to_dict()\n",
    "d_c = df_p.groupby('t')['c'].first().to_dict()\n",
    "df_p['marg'] = df_p['pf'] - df_p['c']\n",
    "df_p['y'] = df_p['t'].dt.year\n",
    "print(df_p['t'].min(), df_p['t'].max())\n",
    "df_p.head(2)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6a965170-9083-45d0-868a-bce8a78127f1",
   "metadata": {},
   "source": [
    "# predicted market share"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e568ceed-ac78-432d-9910-ea1784fbe80a",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(\"./Output/calibration_mks_pre_post.csv\")\n",
    "\n",
    "df['t'] = pd.to_datetime(df['t'])\n",
    "print(df.loc[df['t'].dt.year==2016, 'elas'].mean())\n",
    "df = df.merge(df_p[['t', 'id', 'bid']], on=['id', 't'], how='left')\n",
    "df['year'] = df['t'].dt.year\n",
    "df['c'] = df['t'].map(d_c)\n",
    "df['marg'] = df['p'] / 35 - df['c'] / 100\n",
    "df['rev'] = df['shr_w'] * df['marg']\n",
    "\n",
    "# pre and post treatment dummy\n",
    "df['post'] = np.nan\n",
    "df.loc[df['t'].isin(l_d_pre), 'post'] = 'Sym'\n",
    "df.loc[df['t'].isin(l_d_pos), 'post'] = 'Asym'"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1007a18c-7697-437f-b4cf-53cf1aa11dea",
   "metadata": {},
   "source": [
    "# predict daily total volume"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bbd9a521-717f-4d36-9a39-8cbc1925632e",
   "metadata": {},
   "source": [
    "## quarterly volume in melbourne"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fbbe6f54-1ab0-42ee-a06a-e0d1645312ca",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_qtot = pd.read_stata(\"../Build/Output/voltot_monthly.dta\")\n",
    "df_qtot['y'] = df_qtot['month'].dt.year\n",
    "df_qtot['qoy'] = (df_qtot['month'].dt.month - 1) // 3 + 1\n",
    "df_qtot['quarter'] = (df_qtot['y']).astype(str)+'q'+df_qtot['qoy'].astype(str)\n",
    "df_mel_qtot = df_qtot.groupby(['y', 'quarter', 'qoy']).agg({'month':'min', 'volmel':'sum'}).reset_index()\n",
    "df_mel_qtot.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f16096ce-9ba0-4438-84a6-9cea12f3d59b",
   "metadata": {},
   "outputs": [],
   "source": [
    "vol2015 = df_mel_qtot.loc[df_mel_qtot['month'].dt.year==2015, 'volmel'].sum()\n",
    "print(f\"annual melnourne volume in 2015 is {vol2015:.2f}\")\n",
    "vol2017 = df_mel_qtot.loc[df_mel_qtot['month'].dt.year==2017, 'volmel'].sum()\n",
    "print(f\"annual melnourne volume in 2017 is {vol2017:.2f}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ff5a028e-0cb0-49f1-9dc0-7b42a59e895f",
   "metadata": {},
   "source": [
    "## daily volume share "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6f4faa62-40ba-4ba7-a82f-e03841591ecf",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_qshr = pd.read_stata(\"../Build/Output/volshare_daily.dta\")\n",
    "df_qshr['t'] = pd.to_datetime(df_qshr['t'])\n",
    "df_qshr = df_qshr[df_qshr['t']<'2016-10-01'].copy()\n",
    "df_qshr['w'] = df_qshr['volshare'] / df_qshr['volshare'].sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1e0faa3b-1f6d-4cf1-9fe3-a60c6d11aa9f",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_in = df_p.groupby(['t', 'y']).agg(p2=('p','mean'), c=('c', 'first')).reset_index()\n",
    "df_in['dow'] = df_in['t'].dt.dayofweek\n",
    "df_in['peak'] = (df_in['p2'] == df_in['p2'].rolling(window=11, center=True).max()).astype(int)\n",
    "df_in['bottom'] = (df_in['p2'] == df_in['p2'].rolling(window=11, center=True).min()).astype(int)\n",
    "df_in['w'] = df_in['t'].map(df_qshr.set_index('t')['w'])\n",
    "df_in['lnw'] = np.log(df_in['w'])\n",
    "df_in['dp'] = df_in['p2'].diff()\n",
    "df_in['dp_pos'] = 0\n",
    "df_in.loc[df_in['dp']>0, 'dp_pos'] = df_in['dp']\n",
    "df_in['dp_neg'] = 0\n",
    "df_in.loc[df_in['dp']<0, 'dp_neg'] = df_in['dp']\n",
    "for i in range(1, 7):\n",
    "        df_in['dpF{}_pos'.format(abs(i))] = df_in['dp_pos'].shift(-i)\n",
    "        df_in['dpF{}_neg'.format(abs(i))] = df_in['dp_neg'].shift(-i)\n",
    "for i in range(1, 7):\n",
    "        df_in['dpL{}_pos'.format(abs(i))] = df_in['dp_pos'].shift(i)\n",
    "        df_in['dpL{}_neg'.format(abs(i))] = df_in['dp_neg'].shift(i)\n",
    "        \n",
    "df_in['qoy'] = (df_in['t'].dt.month - 1) // 3 + 1\n",
    "df_in['qoy_ndays'] = df_in['qoy'].map({1:90, 2:91, 3:92, 4:92})\n",
    "df_in['nobs'] = df_in.groupby(['y', 'qoy'])['t'].transform('count')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "14d67ac1-7408-4bfe-b26d-604f408f65be",
   "metadata": {},
   "outputs": [],
   "source": [
    "import statsmodels.formula.api as smf\n",
    "from statsmodels.iolib.summary2 import summary_col\n",
    "\n",
    "d_m = {'L':[], 'F':[], 'r2':[], 'aic':[]}\n",
    "for i in [2,3,4,5,6,7]:\n",
    "    for j in [2,3,4]:\n",
    "            m = smf.ols('lnw ~ C(dow) + dp_pos + dp_neg + ' \n",
    "                + ' + '.join(['dpF{}_pos + dpF{}_neg'.format(jj, jj) for jj in range(1, j)])\n",
    "                + ' + '\n",
    "                + ' + '.join(['dpL{}_pos + dpL{}_neg'.format(ii, ii) for ii in range(1, i)])\n",
    "                        , data=df_in).fit()\n",
    "            d_m['L'].append(i-1)\n",
    "            d_m['F'].append(j-1)\n",
    "            d_m['r2'].append(m.rsquared)\n",
    "            d_m['aic'].append(m.aic)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7ba8ebd5-6bbb-4428-a643-fd47d5351922",
   "metadata": {},
   "outputs": [],
   "source": [
    "m2 = smf.ols('lnw ~ C(dow) + dp_pos + dp_neg + ' \n",
    "            + ' + '.join(['dpF{}_pos + dpF{}_neg'.format(i, i) for i in range(1, 3)])\n",
    "            + ' + '\n",
    "            + ' + '.join(['dpL{}_pos + dpL{}_neg'.format(j, j) for j in range(1, 5)]), data=df_in).fit()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b6500e56-e8d2-41a3-93f0-9be8f58b7fbc",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_m = pd.DataFrame(d_m)\n",
    "df_m[['R-Squared', 'AIC']] = df_m[['r2', 'aic']].round(3)\n",
    "print(df_m.loc[(df_m['L'].isin([3,4,5]))&(df_m['F'].isin([2,3])), \n",
    "               ['L', 'F', 'R-Squared', 'AIC']].style.hide(axis=\"index\").to_latex(hrules=True))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e55a1c7a-0e5c-4629-b5b0-98ae2339800a",
   "metadata": {},
   "outputs": [],
   "source": [
    "m2 = smf.ols('lnw ~ C(dow) + dp_pos + dp_neg + ' \n",
    "            + ' + '.join(['dpF{}_pos + dpF{}_neg'.format(i, i) for i in range(1, 3)])\n",
    "            + ' + '\n",
    "            + ' + '.join(['dpL{}_pos + dpL{}_neg'.format(j, j) for j in range(1, 5)]), data=df_in).fit()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "28691ba3",
   "metadata": {},
   "outputs": [],
   "source": [
    "# m2.params.to_csv('./Temp/parms.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8f6f2180-76c8-4d61-a737-c9965dbe34b0",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_in['lnw_m1'] = m2.predict(df_in)\n",
    "df_in['w_m1'] = np.exp(df_in['lnw_m1'])\n",
    "df_in['w_m1_adj'] = (df_in['w_m1'] / \n",
    "                      (df_in.groupby(['y', 'qoy'])['w_m1'].transform('sum') \n",
    "                       / df_in['nobs'] * df_in['qoy_ndays']))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "80651e7b-8b12-409b-91e4-efda9388d3af",
   "metadata": {},
   "source": [
    "## daily volume "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2bf86a4a-8662-49fc-8497-44e65c540ecb",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_Q = df_in[['t', 'y', 'qoy', 'w_m1_adj']].merge(\n",
    "    df_mel_qtot[['y', 'qoy', 'volmel']], on=['y', 'qoy'], how='left')\n",
    "df_Q['qt'] = df_Q['volmel'] * df_Q['w_m1_adj']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "10f433e3-c46c-4420-93ca-ba76d4be7786",
   "metadata": {},
   "outputs": [],
   "source": [
    "daily_vol2016 = df_Q.loc[df_Q['y']==2016, 'qt'].mean()\n",
    "print(f\"average daily volume in melbourne is {daily_vol2016:.3f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c2a262cc-4377-4e74-9d37-cad60ac8e965",
   "metadata": {},
   "outputs": [],
   "source": [
    "df['Q'] = df['t'].map(df_Q.set_index('t')['qt'])\n",
    "df['s'] = df['shr_w'] / 1e3\n",
    "df['q'] = df['Q'] * df['s']\n",
    "df['prof'] = df['marg'] * df['q']"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ec47154a-abda-48ab-9fa5-a316dd9480f9",
   "metadata": {},
   "source": [
    "# unweighted margin"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7746ae2d-6bce-4216-9d52-b91b5cd3b0ca",
   "metadata": {},
   "outputs": [],
   "source": [
    "tab_marg = df.groupby(['bid', 'post'])[['marg']].mean().reset_index().pivot_table(\n",
    "    index='bid', columns='post', values='marg')\n",
    "tab_marg.index.name = None\n",
    "tab_marg.columns.name = None\n",
    "tab_marg.columns = ['marg_'+i for i in tab_marg.columns]\n",
    "tab_marg = tab_marg * 100\n",
    "tab_marg['dmarg'] = (tab_marg['marg_Asym'] - tab_marg['marg_Sym'])\n",
    "tab_marg['pdmarg'] = tab_marg['dmarg'] / tab_marg['marg_Sym'] * 100\n",
    "tab_marg = tab_marg.loc[l_b, ['marg_Sym', 'marg_Asym', 'dmarg', 'pdmarg']]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1d32a9ef-12ce-4ded-81d4-842419b74e7d",
   "metadata": {},
   "source": [
    "# weighted margin"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "55b4e619-30ac-44ea-ae32-95b84f7c8b5a",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_margw = df.groupby(['bid', 'post'])[['prof', 'q']].sum().reset_index()\n",
    "df_margw['marg_w'] = df_margw['prof'] / df_margw['q']\n",
    "tab_margw = df_margw.pivot_table(index='bid', columns='post', values='marg_w')\n",
    "tab_margw.columns.name = None\n",
    "tab_margw.index.name = None\n",
    "tab_margw = tab_margw.loc[l_b, ['Sym', 'Asym']] * 100\n",
    "tab_margw.columns = ['margw_'+i for i in tab_margw.columns]\n",
    "tab_margw['dmargw'] = tab_margw['margw_Asym'] - tab_margw['margw_Sym']\n",
    "tab_margw['pdmargw'] = (tab_margw['dmargw'] / tab_margw['margw_Sym']) * 100\n",
    "tab_margw"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d4453780-79b7-4bcd-8cb0-3d915332665b",
   "metadata": {},
   "source": [
    "# daily profit"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3f50fa95-c156-41f7-ae19-283b1f056cd2",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_prof_b_t = df.groupby(['t', 'bid', 'post'])['prof'].sum().reset_index()\n",
    "tab_prof = df_prof_b_t.groupby(['bid', 'post'])['prof'].mean().reset_index().pivot_table(\n",
    "    index='bid', columns='post', values='prof') * 1000\n",
    "tab_prof.index.name = None\n",
    "tab_prof.columns.name = None\n",
    "tab_prof.columns = ['prof_'+i for i in tab_prof.columns]\n",
    "tab_prof = tab_prof.loc[l_b, ['prof_Sym', 'prof_Asym']]\n",
    "tab_prof['dprof'] = tab_prof['prof_Asym'] - tab_prof['prof_Sym']\n",
    "tab_prof['pdprof'] = (tab_prof['dprof'] / tab_prof['prof_Sym']) * 100\n",
    "tab_prof"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3e0b0bdf-b7f3-426d-8994-fba2c7de674b",
   "metadata": {},
   "outputs": [],
   "source": [
    "tab_out = pd.concat([tab_marg[['marg_Sym', 'marg_Asym', 'pdmarg']], \n",
    "                     tab_margw[['margw_Sym', 'margw_Asym', 'pdmargw']], \n",
    "                     tab_prof[['prof_Sym', 'prof_Asym', 'pdprof']]], axis=1).copy()\n",
    "mapper =  {'marg_Sym': '{0:.2f}', 'marg_Asym': '{0:.2f}', 'pdmarg': '{0:.0f}\\%', \n",
    "           'margw_Sym': '{0:.2f}', 'margw_Asym': '{0:.2f}', 'pdmargw': '{0:.0f}\\%',\n",
    "           'prof_Sym': '\\${0:.2f}K', 'prof_Asym': '\\${0:.2f}K', 'pdprof': '{0:.0f}\\%'}\n",
    "tab_out = tab_out.apply(lambda x: x.apply(mapper[x.name].format))\n",
    "tab_out.rename(columns={'marg_Sym':'Sym', 'marg_Asym':'Asym', 'pdmarg':'\\%$\\Delta$', \n",
    "                        'margw_Sym':'Sym', 'margw_Asym':'Asym', 'pdmargw':'\\%$\\Delta$', \n",
    "                        'prof_Sym':'Sym', 'prof_Asym':'Asym', 'pdprof':'\\%$\\Delta$'}, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "00131c56-fbdc-4541-89db-66701d6cd845",
   "metadata": {},
   "outputs": [],
   "source": [
    "tab_tex = tab_out.style.to_latex(hrules=True)\n",
    "l_tex = tab_tex.splitlines()\n",
    "l_tex[0] = '\\\\begin{tabular}{lrrrrrrrrr}'\n",
    "l_tex.insert(2, \"\"\"{} & \\\\multicolumn{3}{c}{Unweighted} & \\\\multicolumn{3}{c}{Weighted} \n",
    "                      & \\\\multicolumn{3}{c}{} \\\\\\\\\"\"\")\n",
    "l_tex.insert(3, \"\"\"{} & \\\\multicolumn{3}{c}{Margin (cpl)} & \\\\multicolumn{3}{c}{Margin(cpl)} \n",
    "                      & \\\\multicolumn{3}{c}{Mean Daily Profit (\\\\$)} \\\\\\\\\"\"\")\n",
    "l_tex.insert(4, \"\"\"\\\\cmidrule(lr){2-4} \\\\cmidrule(lr){5-7} \\\\cmidrule(lr){8-10}\"\"\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "69ec41b6-ac71-4484-ba47-279dda4b88a3",
   "metadata": {},
   "outputs": [],
   "source": [
    "print('\\n'.join(l_tex))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6d742449-dcc8-40dd-877c-2372bdc3dc1d",
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"../Analysis/Output/tab3_calibration.tex\", \"w\") as f:\n",
    "    f.write('\\n'.join(l_tex))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.18"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
